package com.kdtech.analyse.news;
import com.kdtech.analyse.AnalyseNews;
import com.kdtech.utils.HtmlCleaner;

import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.kdtech.crawler.CrawlHTML;
import com.kdtech.entity.crawler.UrlMeta;
import com.kdtech.entity.data.NewsMeta;
import com.kdtech.utils.DateUtils;

public class SieMensHomeNewsAnalyse implements AnalyseNews{
	
	//http://www.siemens-home.cn/%E6%96%B0%E9%97%BB%E8%AF%A6%E6%83%85.html?pressrelease=%E8%A5%BF%E9%97%A8%E5%AD%90%E5%85%83%E5%8A%A8%E5%8A%9B%E5%8E%A8%E6%88%BF%EF%BC%8C%E7%83%B9%E9%A5%AA%E4%BA%AB%E5%8F%97%E6%BA%90%E5%8A%A8%E5%8A%9B~8842
		private static final String[] regex={
			"http://www.siemens-home.cn/.*.html[?]pressrelease=.*~[0-9]*"};
		
		public boolean isDetailPage(String url) {
			for (int i=0; i < regex.length; i++) {
				if (url.matches(regex[i])) {
					return true;
				}
			}
			return false;
		}
		
		
		public NewsMeta Update(NewsMeta meta) {
			return null;
		}

		

		
		public NewsMeta parserHtml(UrlMeta urlMeta) {
			NewsMeta news=new NewsMeta();
			if (urlMeta.getHtml() == null) {
			}		
			String htmltxt=urlMeta.getHtml();
			String url=urlMeta.getUrl();		
			if(!isDetailPage(url)){
			}				
			
			
			String title=null;
			String content=null;
			Long date=null;
			Document doc=Jsoup.parse(htmltxt);
			title=doc.select("div.teaser.clearfix h3").text();
			content=HtmlCleaner.getContentHtml(url,doc.select("div.teaser.clearfix"));
			content=content.replace("公司新闻, 新闻内容 ", "");
			content=content.replace("产品新闻, 新闻内容 ", "");
			content=content.replace("嵌入式家电, 新闻内容 ", "");
			content=content.replace("公益环保, 新闻内容 ", "");
			content=content.replace("市场促销, 新闻内容 ", "");
			date=DateUtils.matchDate(doc.select("div.teaser.clearfix p").text());
			if(StringUtils.isBlank(title)){
			}
			news.setUrl(url);		
			news.setTitle(title);
			news.setContent(content);
			news.setDate(date);
			return news;
		}
		
		public static void main(String[] args) {
			SieMensHomeNewsAnalyse test=new SieMensHomeNewsAnalyse();
			String url="http://www.siemens-home.cn/%E6%96%B0%E9%97%BB%E8%AF%A6%E6%83%85.html?pressrelease=%E5%AE%89%E5%BF%83%E4%B9%8B%E5%AE%85-%E2%80%94%E2%80%94-br%E6%82%A6%E7%94%9F%E6%B4%BB%EF%BC%8C%E8%B6%8A%E8%AE%BE%E8%AE%A1%E9%95%BF%E6%B2%99%E7%AB%99%E9%A3%8E%E6%B0%B4%E4%B8%8E%E5%AE%A4%E5%86%85%E8%AE%BE%E8%AE%A1%E4%B8%93%E5%9C%BA~8841";
			UrlMeta meta=CrawlHTML.responseToURL(url);
			NewsMeta parserHtml=test.parserHtml(meta);
			System.out.println(parserHtml);
		}

	
	public boolean isNeedUpdate(){
		return false;
	}
}
